//==============================================================================
// Project:		Wealth Transfers and their Economic Effects
// File name:	Under-representation of aged care residents in the HILDA sample
// Objective: 	Calculate the proportion of HILDA sample members who are
//				residents of nursing homes (both for the entire sample and the
//				sample excluding top-up members) and comparing that to
//				population aggregates sourced from AIHW.
//
// Created: 	03/03/2021
// Modified: 	16/11/2021
//==============================================================================

**************************
*** Importing the data ***
**************************

clear	// clearing the workspace

import excel "Aged care data.xlsx", sheet("Sheet2") firstrow case(lower)	// importing the administrative data from AIHW

save "Aged care.dta", replace	// saving the data as a .dta file

clear	// clearing the workspace

import excel "ABS population data.xls", sheet("Sheet1") firstrow case(lower)	// importing population data sourced from the ABS (Cat. no. 3101.0)

save "ABS population data.dta", replace	// saving the data as a .dta file

clear	// clearing the workspace

use "HILDA_restricted_combined_rescaled.dta"	// importing the HILDA data

*********************
*** Data analysis ***
*********************

tab1 dodtyp if dodtyp >= 0, generate(freq)	// generating frequencies for the different dwelling types

by wave, sort: egen sample = count(xwaveid)	// calculating the number of enumerated persons in each wave of the HILDA sample

by wave, sort: egen nursing_home_sample = count(xwaveid) if dodtyp == 1	//	calculating the number of nursing home residents who are enumerated in each wave of the HILDA sample

collapse freq1 sample nursing_home_sample, by(wave)	// collapsing the dataset by wave and keeping the key variables

generate year = wave + 2000	// generating a year variable

rename freq1 percent_nursinghome_HILDA	// renaming the nursing home frequency variable 'freq1' 'percent_nursinghome_HILDA'

save "HILDA nursing homes.dta", replace	// saving the dataset

****************************************
*** Data analysis exc. top-up sample ***
****************************************

clear	// clearing the workspace

use "HILDA_restricted_combined_rescaled.dta"	// importing the HILDA data

keep if hhtup == 0	// dropping respondents who are not members of the top-up sample

tab1 dodtyp if dodtyp >= 0, generate(freq)	// generating frequencies for the different dwelling types

by wave, sort: egen sample_excTU = count(xwaveid)	// calculating the number of enumerated persons in each wave of the HILDA sample

by wave, sort: egen nursing_home_sample_excTU = count(xwaveid) if dodtyp == 1	// calculating the number of nursing home residents who are enumerated in each wave of the HILDA sample

rename freq1 nursing_home_percent_excTU	// renaming the nursing home frequency variable 'freq1' 'nursing_home_percent_excTU'

collapse nursing_home_percent_excTU sample_excTU nursing_home_sample_excTU, by(wave)	// collapsing the dataset by wave and keeping the key variables

gen year = wave + 2000	// generating a year variable

drop wave	// dropping the wave variable

save "HILDA nursing homes exc top-up.dta", replace	// saving the dataset

******************************************
*** Merging in the administrative data ***
******************************************

use "HILDA nursing homes.dta" // opening the HILDA nursing homes dataset, which includes members of the top-up sample

merge m:m year using "ABS population data.dta", nogen	// merging in the ABS population data

merge m:m year using "Aged care.dta", nogen	// merging in the AIHW aged care data

generate percent_residential_care = residentialcare / popdecember	// generating a proportion living in residential aged care variable

drop wave pop1000sdecember transitioncare homecare	// dropping unnecessary variables

save "HILDA nursing homes combined.dta", replace	// saving the dataset

****************************************************
*** Merging in the HILDA exc. top-up sample data ***
****************************************************

merge 1:1 year using "HILDA nursing homes exc top-up.dta", nogen	// merging in the dataset based on the HILDA sample excluding members of the top-up sample

replace nursing_home_percent_excTU = percent_nursinghome_HILDA if year <= 2010	// replacing the top-up sample variables equal to the entire sample variables for years prior to 2011, when the top-up sample was introduced

replace sample_excTU = sample if year <= 2010	// replacing the top-up sample variables equal to the entire sample variables for years prior to 2011, when the top-up sample was introduced

replace nursing_home_sample_excTU = nursing_home_sample if year <= 2010	// replacing the top-up sample variables equal to the entire sample variables for years prior to 2011, when the top-up sample was introduced

**************************
*** Exporting the data ***
**************************

order year sample nursing_home_sample percent_nursinghome_HILDA popdecember residentialcare percent_residential_care sample_excTU nursing_home_sample_excTU nursing_home_percent_excTU	// re-ordering the variables

save "HILDA nursing homes combined.dta", replace	// saving the dataset